In [0]:
!pip install geopandas
!apt-get -qq install python-cartopy python3-cartopy 
import cartopy 
!pip3 install geoplot 
import statsmodels.iolib.foreign as smio 
from pandas import DataFrame 
import pandas as pd 
import geopandas as gpd 
import altair as alt 
import vega_datasets as data 
import json 
import warnings 
import numpy as np 
import matplotlib.pyplot as plt 
import geoplot as gplt 
import geoplot.crs as gcrs 
warnings.filterwarnings('ignore')
Collecting geopandas
  Downloading https://files.pythonhosted.org/packages/52/4f/6440a63c9367d981a91de458467eed4a8e259a26f24158071b610a1ed1dd/geopandas-0.6.3-py2.py3-none-any.whl (920kB)
     |████████████████████████████████| 921kB 3.4MB/s 
Requirement already satisfied: shapely in /usr/local/lib/python3.6/dist-packages (from geopandas) (1.7.0)
Collecting pyproj
  Downloading https://files.pythonhosted.org/packages/d6/70/eedc98cd52b86de24a1589c762612a98bea26cde649ffdd60c1db396cce8/pyproj-2.4.2.post1-cp36-cp36m-manylinux2010_x86_64.whl (10.1MB)
     |████████████████████████████████| 10.1MB 62.8MB/s 
Collecting fiona
  Downloading https://files.pythonhosted.org/packages/50/f7/9899f8a9a2e38601472fe1079ce5088f58833221c8b8507d8b5eafd5404a/Fiona-1.8.13-cp36-cp36m-manylinux1_x86_64.whl (11.8MB)
     |████████████████████████████████| 11.8MB 54.9MB/s 
Requirement already satisfied: pandas>=0.23.0 in /usr/local/lib/python3.6/dist-packages (from geopandas) (0.25.3)
Collecting click-plugins>=1.0
  Downloading https://files.pythonhosted.org/packages/e9/da/824b92d9942f4e472702488857914bdd50f73021efea15b4cad9aca8ecef/click_plugins-1.1.1-py2.py3-none-any.whl
Requirement already satisfied: six>=1.7 in /usr/local/lib/python3.6/dist-packages (from fiona->geopandas) (1.12.0)
Collecting cligj>=0.5
  Downloading https://files.pythonhosted.org/packages/e4/be/30a58b4b0733850280d01f8bd132591b4668ed5c7046761098d665ac2174/cligj-0.5.0-py3-none-any.whl
Requirement already satisfied: click<8,>=4.0 in /usr/local/lib/python3.6/dist-packages (from fiona->geopandas) (7.0)
Requirement already satisfied: attrs>=17 in /usr/local/lib/python3.6/dist-packages (from fiona->geopandas) (19.3.0)
Collecting munch
  Downloading https://files.pythonhosted.org/packages/cc/ab/85d8da5c9a45e072301beb37ad7f833cd344e04c817d97e0cc75681d248f/munch-2.5.0-py2.py3-none-any.whl
Requirement already satisfied: python-dateutil>=2.6.1 in /usr/local/lib/python3.6/dist-packages (from pandas>=0.23.0->geopandas) (2.6.1)
Requirement already satisfied: numpy>=1.13.3 in /usr/local/lib/python3.6/dist-packages (from pandas>=0.23.0->geopandas) (1.17.5)
Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.6/dist-packages (from pandas>=0.23.0->geopandas) (2018.9)
Installing collected packages: pyproj, click-plugins, cligj, munch, fiona, geopandas
Successfully installed click-plugins-1.1.1 cligj-0.5.0 fiona-1.8.13 geopandas-0.6.3 munch-2.5.0 pyproj-2.4.2.post1
Selecting previously unselected package python-pkg-resources.
(Reading database ... 145113 files and directories currently installed.)
Preparing to unpack .../0-python-pkg-resources_39.0.1-2_all.deb ...
Unpacking python-pkg-resources (39.0.1-2) ...
Selecting previously unselected package python-pyshp.
Preparing to unpack .../1-python-pyshp_1.2.12+ds-1_all.deb ...
Unpacking python-pyshp (1.2.12+ds-1) ...
Selecting previously unselected package python-shapely.
Preparing to unpack .../2-python-shapely_1.6.4-1_amd64.deb ...
Unpacking python-shapely (1.6.4-1) ...
Selecting previously unselected package python-six.
Preparing to unpack .../3-python-six_1.11.0-2_all.deb ...
Unpacking python-six (1.11.0-2) ...
Selecting previously unselected package python-cartopy:amd64.
Preparing to unpack .../4-python-cartopy_0.14.2+dfsg1-2build3_amd64.deb ...
Unpacking python-cartopy:amd64 (0.14.2+dfsg1-2build3) ...
Selecting previously unselected package python3-pkg-resources.
Preparing to unpack .../5-python3-pkg-resources_39.0.1-2_all.deb ...
Unpacking python3-pkg-resources (39.0.1-2) ...
Selecting previously unselected package python3-pyshp.
Preparing to unpack .../6-python3-pyshp_1.2.12+ds-1_all.deb ...
Unpacking python3-pyshp (1.2.12+ds-1) ...
Selecting previously unselected package python3-shapely.
Preparing to unpack .../7-python3-shapely_1.6.4-1_amd64.deb ...
Unpacking python3-shapely (1.6.4-1) ...
Selecting previously unselected package python3-six.
Preparing to unpack .../8-python3-six_1.11.0-2_all.deb ...
Unpacking python3-six (1.11.0-2) ...
Selecting previously unselected package python3-cartopy:amd64.
Preparing to unpack .../9-python3-cartopy_0.14.2+dfsg1-2build3_amd64.deb ...
Unpacking python3-cartopy:amd64 (0.14.2+dfsg1-2build3) ...
Setting up python-shapely (1.6.4-1) ...
Setting up python-pyshp (1.2.12+ds-1) ...
Setting up python3-six (1.11.0-2) ...
Setting up python3-shapely (1.6.4-1) ...
Setting up python3-pyshp (1.2.12+ds-1) ...
Setting up python3-pkg-resources (39.0.1-2) ...
Setting up python-pkg-resources (39.0.1-2) ...
Setting up python-six (1.11.0-2) ...
Setting up python3-cartopy:amd64 (0.14.2+dfsg1-2build3) ...
Setting up python-cartopy:amd64 (0.14.2+dfsg1-2build3) ...
Collecting geoplot
  Downloading https://files.pythonhosted.org/packages/a0/40/40c264002dfba4736f35c3524f6a0e7a03458fb4fcc436ce501f541359b7/geoplot-0.4.0-py3-none-any.whl
Requirement already satisfied: matplotlib in /usr/local/lib/python3.6/dist-packages (from geoplot) (3.1.3)
Requirement already satisfied: pandas in /usr/local/lib/python3.6/dist-packages (from geoplot) (0.25.3)
Requirement already satisfied: descartes in /usr/local/lib/python3.6/dist-packages (from geoplot) (1.1.0)
Requirement already satisfied: seaborn in /usr/local/lib/python3.6/dist-packages (from geoplot) (0.10.0)
Requirement already satisfied: geopandas in /usr/local/lib/python3.6/dist-packages (from geoplot) (0.6.3)
Collecting mapclassify
  Downloading https://files.pythonhosted.org/packages/91/b5/6b54f40901d89f2ce30cf2d8110dd57658db3e95db79baba6706d7588691/mapclassify-2.2.0.tar.gz (47kB)
     |████████████████████████████████| 51kB 3.2MB/s 
Collecting contextily>=1.0rc2
  Downloading https://files.pythonhosted.org/packages/bb/e0/fd2f860ac34c97c1370514a1fa37d33c46dca5bea8a6f49dc167fbffbc1f/contextily-1.0rc2-py3-none-any.whl
Requirement already satisfied: cartopy in /usr/lib/python3/dist-packages (from geoplot) (0.14.2)
Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.6/dist-packages (from matplotlib->geoplot) (0.10.0)
Requirement already satisfied: numpy>=1.11 in /usr/local/lib/python3.6/dist-packages (from matplotlib->geoplot) (1.17.5)
Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib->geoplot) (1.1.0)
Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib->geoplot) (2.4.6)
Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib->geoplot) (2.6.1)
Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.6/dist-packages (from pandas->geoplot) (2018.9)
Requirement already satisfied: scipy>=1.0.1 in /usr/local/lib/python3.6/dist-packages (from seaborn->geoplot) (1.4.1)
Requirement already satisfied: pyproj in /usr/local/lib/python3.6/dist-packages (from geopandas->geoplot) (2.4.2.post1)
Requirement already satisfied: shapely in /usr/local/lib/python3.6/dist-packages (from geopandas->geoplot) (1.7.0)
Requirement already satisfied: fiona in /usr/local/lib/python3.6/dist-packages (from geopandas->geoplot) (1.8.13)
Requirement already satisfied: scikit-learn in /usr/local/lib/python3.6/dist-packages (from mapclassify->geoplot) (0.22.1)
Collecting deprecated
  Downloading https://files.pythonhosted.org/packages/f6/89/62912e01f3cede11edcc0abf81298e3439d9c06c8dce644369380ed13f6d/Deprecated-1.2.7-py2.py3-none-any.whl
Collecting rasterio
  Downloading https://files.pythonhosted.org/packages/be/e5/7052a3eef72af7e883a280d8dff64f4ea44cb92ec25ffb1d00ce27bc1a12/rasterio-1.1.2-cp36-cp36m-manylinux1_x86_64.whl (18.0MB)
     |████████████████████████████████| 18.0MB 201kB/s 
Collecting mercantile
  Downloading https://files.pythonhosted.org/packages/9d/1d/80d28ba17e4647bf820e8d5f485d58f9da9c5ca424450489eb49e325ba66/mercantile-1.1.2-py3-none-any.whl
Requirement already satisfied: geopy in /usr/local/lib/python3.6/dist-packages (from contextily>=1.0rc2->geoplot) (1.17.0)
Requirement already satisfied: pillow in /usr/local/lib/python3.6/dist-packages (from contextily>=1.0rc2->geoplot) (6.2.2)
Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from contextily>=1.0rc2->geoplot) (2.21.0)
Requirement already satisfied: joblib in /usr/local/lib/python3.6/dist-packages (from contextily>=1.0rc2->geoplot) (0.14.1)
Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from cycler>=0.10->matplotlib->geoplot) (1.12.0)
Requirement already satisfied: setuptools in /usr/local/lib/python3.6/dist-packages (from kiwisolver>=1.0.1->matplotlib->geoplot) (45.1.0)
Requirement already satisfied: click-plugins>=1.0 in /usr/local/lib/python3.6/dist-packages (from fiona->geopandas->geoplot) (1.1.1)
Requirement already satisfied: munch in /usr/local/lib/python3.6/dist-packages (from fiona->geopandas->geoplot) (2.5.0)
Requirement already satisfied: click<8,>=4.0 in /usr/local/lib/python3.6/dist-packages (from fiona->geopandas->geoplot) (7.0)
Requirement already satisfied: cligj>=0.5 in /usr/local/lib/python3.6/dist-packages (from fiona->geopandas->geoplot) (0.5.0)
Requirement already satisfied: attrs>=17 in /usr/local/lib/python3.6/dist-packages (from fiona->geopandas->geoplot) (19.3.0)
Requirement already satisfied: wrapt<2,>=1.10 in /usr/local/lib/python3.6/dist-packages (from deprecated->mapclassify->geoplot) (1.11.2)
Collecting affine
  Downloading https://files.pythonhosted.org/packages/ac/a6/1a39a1ede71210e3ddaf623982b06ecfc5c5c03741ae659073159184cd3e/affine-2.3.0-py2.py3-none-any.whl
Collecting snuggs>=1.4.1
  Downloading https://files.pythonhosted.org/packages/cc/0e/d27d6e806d6c0d1a2cfdc5d1f088e42339a0a54a09c3343f7f81ec8947ea/snuggs-1.4.7-py3-none-any.whl
Requirement already satisfied: geographiclib<2,>=1.49 in /usr/local/lib/python3.6/dist-packages (from geopy->contextily>=1.0rc2->geoplot) (1.50)
Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->contextily>=1.0rc2->geoplot) (3.0.4)
Requirement already satisfied: idna<2.9,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->contextily>=1.0rc2->geoplot) (2.8)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->contextily>=1.0rc2->geoplot) (2019.11.28)
Requirement already satisfied: urllib3<1.25,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->contextily>=1.0rc2->geoplot) (1.24.3)
Building wheels for collected packages: mapclassify
  Building wheel for mapclassify (setup.py) ... done
  Created wheel for mapclassify: filename=mapclassify-2.2.0-cp36-none-any.whl size=47028 sha256=90c228d3a6d01a4c71393e05318e14c61a63f2890bf82bf6d010f415acf6f2e0
  Stored in directory: /root/.cache/pip/wheels/96/0c/c1/b2bce9fc10da2e6c6befc9fe5929adca7c71b7c4abd1fc42f0
Successfully built mapclassify
Installing collected packages: deprecated, mapclassify, affine, snuggs, rasterio, mercantile, contextily, geoplot
Successfully installed affine-2.3.0 contextily-1.0rc2 deprecated-1.2.7 geoplot-0.4.0 mapclassify-2.2.0 mercantile-1.1.2 rasterio-1.1.2 snuggs-1.4.7
In [0]:
from google.colab import drive
drive.mount('/content/drive')
Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive
In [0]:
cd "/content/drive/My Drive/Datavisualization"
/content/drive/My Drive/Datavisualization

Population aging in US

US population aging is proceeding slowly but clearly

In [0]:
urls = ["https://docs.google.com/spreadsheets/d/1sUA47Q8OrH18YI4q_DOP7OWIoHX2i8kH0fk5E4lFszE/export?format=csv&gid=1048618207",
        "https://docs.google.com/spreadsheets/d/18KT-PC9IXKqYL-wd5zHHzYV4K8eZ9mgUe98XaFANMQM/export?format=csv&gid=2072191966", 
        "https://docs.google.com/spreadsheets/d/1p_fq_sG3nYnHdOkLVglxXF2zmT1XrPK0c-538lcFIAg/export?format=csv&gid=1688370404",
        "https://docs.google.com/spreadsheets/d/1ymCdsBHgamJis3_2PK0IZS_VZvls5Umn-ng9o4S0Sro/export?format=csv&gid=119942101",
        "https://docs.google.com/spreadsheets/d/1AqUeUfzuAUG5pCLJGbrysCcTxICCrHsA8rRE1ffHZpU/export?format=csv&gid=1384942932",
        "https://docs.google.com/spreadsheets/d/1zvfu9Nc27aGo2ovAn2kqofv-iVXO61-n-dQ4DIrpbUs/export?format=csv&gid=2105631292",
        "https://docs.google.com/spreadsheets/d/1XAXIQeLrgxgEx6CCHxWOmfgmInRxwCxu-4vw1PR8dp8/export?format=csv&gid=1064535711",
        "https://docs.google.com/spreadsheets/d/1xad1XOm-J4T13rYFJ7BBeskaaCV88BAQZExqNX3aX0w/export?format=csv&gid=95139586",
        "https://docs.google.com/spreadsheets/d/1pBj5q5NkzpZUKeOqFZCGb0RdzV1mvS8rw2iXWcwYKbc/export?format=csv&gid=616415653",
        "https://docs.google.com/spreadsheets/d/1dZBbXEKSRqhR8Yy9_zFBAjzdw_Gyu3czYT0GNcGhrZI/export?format=csv&gid=687388009",
        "https://docs.google.com/spreadsheets/d/1pReietRze4QKpMOM6dbJeJ4bdekcGgrxcaC5k4Qv8C8/export?format=csv&gid=2108461693",
        "https://docs.google.com/spreadsheets/d/1KkqQ8U4vL5Tjy7Cq16FQD7xi3qnfPnyYiMYQuiJ6Hkc/export?format=csv&gid=957537605"]

dfs =[]

for url in urls:
  df = pd.read_csv(url, encoding='latin-1')[1:]
  dfs.append(df)

pop65_conc = pd.concat(dfs)

col_use = ["date", "HC01_EST_VC01", "HC02_EST_VC01"]

pop65_conc = pop65_conc[col_use]

pop65_conc["HC01_EST_VC01"]=pop65_conc["HC01_EST_VC01"].astype("int")
pop65_conc["HC02_EST_VC01"]=pop65_conc["HC02_EST_VC01"].astype("int")

pop65_conc["pop65_rate"] = pop65_conc["HC02_EST_VC01"]/pop65_conc["HC01_EST_VC01"]

pop65_conc["date"] =pd.to_datetime(pop65_conc["date"])
pop65_conc["year"] = pop65_conc["date"].dt.year
pop65_conc = pop65_conc.drop("date", axis=1)
pd.set_option('display.max_rows', 10000)
pop65_conc["year"] = pop65_conc["year"].astype("str") 
In [0]:
step = 30
overlap = 0.5

alt.themes.enable('latimes')

chart = alt.Chart(pop65_conc, height=step, width=700).transform_joinaggregate(
    mean_rate='mean(pop65_rate)', groupby=['year']
).transform_bin(
   ['bin_max', 'bin_min'], field='pop65_rate', bin=alt.BinParams(step=0.03)
).transform_aggregate(
    value='count()', groupby=['year','mean_rate', 'bin_min', 'bin_max']
).transform_impute(
    impute='value', groupby=['year','mean_rate'], key='bin_min', value=0
).mark_area(
    interpolate='monotone',
    fillOpacity=0.8,
    stroke='lightgray',
    strokeWidth=0.5
).encode(
    alt.X('bin_min:Q', bin='binned', title='population rate of over 65',
        axis=alt.Axis(title='Population rate of over age 65', ),
        scale=alt.Scale(domain=(0.05, 0.30))),
    alt.Y(
        'value:Q',
        scale=alt.Scale(range=[step, -step * overlap]),
        axis= None 
    ),
    alt.Fill(
        'mean_rate:Q',
        legend=alt.Legend(title="Population rate of over age65"),
        scale=alt.Scale(domain=[0.16, 0.11], scheme='inferno')
    )
).facet(
    row = alt.Row(
        'year:N',
        title="year",
        header=alt.Header(titleFontSize=13, labelAngle=0, labelFontSize=12, labelAlign="left")
    )
).properties(
    title={
      "text": ["US Population has been ageing clearly in last 10 years"], 
      "subtitle": ["Distribution of over65 population rate for each county"],
      "color": "blue",
      "subtitleColor": "black",
      "subtitleFontSize":18,
      "subtitleFontWeight":"bold"
    },
    bounds='flush'
).configure_facet(
    spacing=0
).configure_view(
    stroke=None
).configure_title(
    anchor='end'
)

config = chart.configure_title(
    fontSize=24,
    fontWeight="bold",
    anchor="middle"
).configure_legend(
    titleColor='black',
    titleFontSize=13,
    titleFontWeight="bold",
    orient='right',
    fillColor="#ebf5ff",
    padding=10
).configure_axisX(
    titleFontWeight="bold",
    labelFontSize=11,
    grid=True,
    gridDash=[2,3]
)

config
Out[0]:
In [0]:
sc00_18 = pd.read_csv("sc00_18.csv")

sc00_18_wrkag = sc00_18[(sc00_18["AGE"]>=15) & (sc00_18["AGE"]<=64)]

sc00_18_ov65 = sc00_18[(sc00_18["AGE"]>=65) & (sc00_18["AGE"]!=999)]

sc00_18_wrkag_div = sc00_18_wrkag.groupby(["DIVISION"]).sum()

sc00_18_ov65_div = sc00_18_ov65.groupby(["DIVISION"]).sum()

sc00_18_wrkag_divt =sc00_18_wrkag_div.T[8:]

div_dic = {0:"USA", 1:"New England", 2:"Mid-Atlantic", 3:"East North Central", 4:"West North Central", \
       5:"South Atlantic", 6:"East South Central", 7:"West South Central", 8:"Mountain", 9:"Pacific"}

div_list = []
for i in range(0, 10):
    sc00_18_wrkag_divt_col = sc00_18_wrkag_divt[i].reset_index()
    year = sc00_18_wrkag_divt_col["index"].apply(lambda x: x[6:10])
    sc00_18_wrkag_divt_col["index"]=year 
    sc00_18_wrkag_divt_col = sc00_18_wrkag_divt_col.rename(columns={"index":"year", i:"pop"})
    sc00_18_wrkag_divt_col["pct_chg"] = sc00_18_wrkag_divt_col["pop"].pct_change()
    sc00_18_wrkag_divt_col["pct_chg"][0] = 0
    sc00_18_wrkag_divt_col["pct_chg"] = sc00_18_wrkag_divt_col["pct_chg"]*100 +100

    sc00_18_wrkag_divt_col

    sc00_18_wrkag_divt_col["div"] = i
    
    div = sc00_18_wrkag_divt_col["div"].apply(lambda x: div_dic[i])
    
    sc00_18_wrkag_divt_col["div"] = div
    
    div_list.append(sc00_18_wrkag_divt_col)

conc_div = pd.concat(div_list)
In [0]:
conc_div
Out[0]:
year pop pct_chg div
0 2000 185553036 100.000000 USA
1 2001 187958685 101.296475 USA
2 2002 190202120 101.193579 USA
3 2003 192273017 101.088788 USA
4 2004 194509615 101.163241 USA
5 2005 196905561 101.231788 USA
6 2006 199184434 101.157343 USA
7 2007 201262884 101.043480 USA
8 2008 202832886 100.780075 USA
9 2009 204328489 100.737357 USA
10 2010 206411378 101.019383 USA
11 2011 207840779 100.692501 USA
12 2012 208384607 100.261656 USA
13 2013 209108775 100.347515 USA
14 2014 209937223 100.396180 USA
15 2015 210878728 100.448470 USA
16 2016 211669432 100.374957 USA
17 2017 212199885 100.250604 USA
18 2018 212663876 100.218658 USA
0 2000 9234550 100.000000 New England
1 2001 9340847 101.151079 New England
2 2002 9433530 100.992233 New England
3 2003 9509100 100.801079 New England
4 2004 9564599 100.583641 New England
5 2005 9622218 100.602419 New England
6 2006 9681877 100.620013 New England
7 2007 9727718 100.473472 New England
8 2008 9772388 100.459203 New England
9 2009 9820756 100.494946 New England
10 2010 9835211 100.147188 New England
11 2011 9881953 100.475252 New England
12 2012 9881526 99.995679 New England
13 2013 9886797 100.053342 New England
14 2014 9901681 100.150544 New England
15 2015 9892384 99.906107 New England
16 2016 9877529 99.849834 New England
17 2017 9868303 99.906596 New England
18 2018 9860687 99.922824 New England
0 2000 26096104 100.000000 Mid-Atlantic
1 2001 26286764 100.730607 Mid-Atlantic
2 2002 26464015 100.674298 Mid-Atlantic
3 2003 26634828 100.645454 Mid-Atlantic
4 2004 26813127 100.669421 Mid-Atlantic
5 2005 26985919 100.644431 Mid-Atlantic
6 2006 27152060 100.615658 Mid-Atlantic
7 2007 27286855 100.496445 Mid-Atlantic
8 2008 27362456 100.277060 Mid-Atlantic
9 2009 27484363 100.445527 Mid-Atlantic
10 2010 27592712 100.394221 Mid-Atlantic
11 2011 27702306 100.397185 Mid-Atlantic
12 2012 27645640 99.795447 Mid-Atlantic
13 2013 27609418 99.868978 Mid-Atlantic
14 2014 27542131 99.756290 Mid-Atlantic
15 2015 27453041 99.676532 Mid-Atlantic
16 2016 27328207 99.545282 Mid-Atlantic
17 2017 27183293 99.469727 Mid-Atlantic
18 2018 27057160 99.535991 Mid-Atlantic
0 2000 29783067 100.000000 East North Central
1 2001 30011754 100.767842 East North Central
2 2002 30208247 100.654720 East North Central
3 2003 30385724 100.587512 East North Central
4 2004 30558598 100.568932 East North Central
5 2005 30751305 100.630615 East North Central
6 2006 30914011 100.529103 East North Central
7 2007 31032248 100.382471 East North Central
8 2008 31068041 100.115341 East North Central
9 2009 31128149 100.193472 East North Central
10 2010 31005385 99.605617 East North Central
11 2011 31070360 100.209560 East North Central
12 2012 30980394 99.710444 East North Central
13 2013 30958056 99.927896 East North Central
14 2014 30906887 99.834715 East North Central
15 2015 30821805 99.724715 East North Central
16 2016 30723858 99.682215 East North Central
17 2017 30622152 99.668967 East North Central
18 2018 30513108 99.643905 East North Central
0 2000 12542654 100.000000 West North Central
1 2001 12656938 100.911163 West North Central
2 2002 12761858 100.828952 West North Central
3 2003 12855572 100.734329 West North Central
4 2004 12964043 100.843766 West North Central
5 2005 13083182 100.918996 West North Central
6 2006 13193770 100.845268 West North Central
7 2007 13293836 100.758434 West North Central
8 2008 13370367 100.575688 West North Central
9 2009 13446912 100.572497 West North Central
10 2010 13520388 100.546415 West North Central
11 2011 13584699 100.475659 West North Central
12 2012 13597629 100.095181 West North Central
13 2013 13628370 100.226076 West North Central
14 2014 13654793 100.193882 West North Central
15 2015 13675071 100.148505 West North Central
16 2016 13673669 99.989748 West North Central
17 2017 13666210 99.945450 West North Central
18 2018 13661755 99.967401 West North Central
0 2000 34037384 100.000000 South Atlantic
1 2001 34637823 101.764057 South Atlantic
2 2002 35240314 101.739402 South Atlantic
3 2003 35785247 101.546334 South Atlantic
4 2004 36454642 101.870589 South Atlantic
5 2005 37188921 102.014226 South Atlantic
6 2006 37846876 101.769223 South Atlantic
7 2007 38373272 101.390857 South Atlantic
8 2008 38716916 100.895530 South Atlantic
9 2009 39014299 100.768096 South Atlantic
10 2010 39747533 101.879398 South Atlantic
11 2011 40107812 100.906419 South Atlantic
12 2012 40283492 100.438019 South Atlantic
13 2013 40471640 100.467060 South Atlantic
14 2014 40723329 100.621890 South Atlantic
15 2015 41070750 100.853125 South Atlantic
16 2016 41413942 100.835612 South Atlantic
17 2017 41714903 100.726714 South Atlantic
18 2018 41965488 100.600709 South Atlantic
0 2000 11287742 100.000000 East South Central
1 2001 11359712 100.637594 East South Central
2 2002 11433101 100.646046 East South Central
3 2003 11520385 100.763432 East South Central
4 2004 11610247 100.780026 East South Central
5 2005 11734779 101.072604 East South Central
6 2006 11867441 101.130503 East South Central
7 2007 11965568 100.826859 East South Central
8 2008 12057024 100.764326 East South Central
9 2009 12102510 100.377257 East South Central
10 2010 12262679 101.323436 East South Central
11 2011 12303093 100.329569 East South Central
12 2012 12291416 99.905089 East South Central
13 2013 12307515 100.130978 East South Central
14 2014 12304785 99.977818 East South Central
15 2015 12313573 100.071419 East South Central
16 2016 12321734 100.066276 East South Central
17 2017 12331490 100.079177 East South Central
18 2018 12332494 100.008142 East South Central
0 2000 20732403 100.000000 West South Central
1 2001 21033103 101.450387 West South Central
2 2002 21331365 101.418060 West South Central
3 2003 21611636 101.313892 West South Central
4 2004 21894571 101.309179 West South Central
5 2005 22203545 101.411190 West South Central
6 2006 22460870 101.158937 West South Central
7 2007 22881154 101.871183 West South Central
8 2008 23232498 101.535517 West South Central
9 2009 23567141 101.440409 West South Central
10 2010 24124653 102.365633 West South Central
11 2011 24455376 101.370892 West South Central
12 2012 24695458 100.981715 West South Central
13 2013 24952941 101.042633 West South Central
14 2014 25257601 101.220938 West South Central
15 2015 25601541 101.361729 West South Central
16 2016 25872992 101.060292 West South Central
17 2017 26062101 100.730913 West South Central
18 2018 26249324 100.718373 West South Central
0 2000 12019297 100.000000 Mountain
1 2001 12307396 102.396970 Mountain
2 2002 12580141 102.216106 Mountain
3 2003 12824035 101.938722 Mountain
4 2004 13107457 102.210084 Mountain
5 2005 13452885 102.635355 Mountain
6 2006 13822890 102.750377 Mountain
7 2007 14134748 102.256098 Mountain
8 2008 14369089 101.657907 Mountain
9 2009 14545062 101.224664 Mountain
10 2010 14551636 100.045197 Mountain
11 2011 14685362 100.918976 Mountain
12 2012 14789514 100.709223 Mountain
13 2013 14907596 100.798417 Mountain
14 2014 15044486 100.918257 Mountain
15 2015 15222518 101.183370 Mountain
16 2016 15417289 101.279493 Mountain
17 2017 15588968 101.113549 Mountain
18 2018 15786094 101.264522 Mountain
0 2000 29819835 100.000000 Pacific
1 2001 30324348 101.691871 Pacific
2 2002 30749549 101.402177 Pacific
3 2003 31146490 101.290884 Pacific
4 2004 31542331 101.270901 Pacific
5 2005 31882807 101.079426 Pacific
6 2006 32244639 101.134881 Pacific
7 2007 32567485 101.001239 Pacific
8 2008 32884107 100.972203 Pacific
9 2009 33219297 101.019307 Pacific
10 2010 33771181 101.661336 Pacific
11 2011 34049818 100.825073 Pacific
12 2012 34219538 100.498446 Pacific
13 2013 34386442 100.487745 Pacific
14 2014 34601530 100.625502 Pacific
15 2015 34828045 100.654639 Pacific
16 2016 35040212 100.609184 Pacific
17 2017 35162465 100.348893 Pacific
18 2018 35237766 100.214152 Pacific
In [0]:
conc_div["a"] =100

box = pd.DataFrame({'x1': [2000], 'x2': [2018], 'y1': [0], 'y2': [100]})

domain = ["USA", "New England", "Mid-Atlantic","East North Central", "West North Central",\
          "South Atlantic", "East South Central", "West South Central", "Mountain", "Pacific"]

range_ = ['red', 'blue', 'green','orange','pink','light-gray','light-gray','light-gray','light-gray','light-grey']

chart = alt.Chart(conc_div, width=800, height=300).mark_line().encode(
    x=alt.X('year:O',
    axis=alt.Axis(title="year")),
    y = alt.Y('pct_chg:Q',
        scale=alt.Scale(domain=(99, 103),),
        axis=alt.Axis(title="percent_change(indexed to 100)")
    ),
    color=alt.Color('div:N', scale=alt.Scale(domain=domain, range=range_),
                   legend=alt.Legend(title="Division",
                                     values=["USA", "New England", "Mid-Atlantic","East North Central","West North Central",
                                             "(light-grey)Other divisions"]))
).properties(
    title={
      "text": ["Working-age population starts declining in some areas"], 
      "subtitle": ["The change rate of working-age population for each division"],
      "color": "blue",
      "subtitleColor": "black",
      "subtitleFontSize":18,
      "subtitleFontWeight":"bold"
    }
)

box = alt.Chart(conc_div).mark_rect(fill='#75bbff', stroke='none',strokeOpacity=0, fillOpacity=0.03).encode(
    alt.X('year:N'),
    alt.Y('a')
)

rule = alt.Chart(conc_div).mark_rule(color='black', strokeDash=[2,3], size=2).encode(
    y='a:Q'
)

config = (chart+box+rule).configure_title(
    fontSize=24,
    fontWeight="bold",
    anchor="middle"
).configure_legend(
    titleColor='black',
    titleFontSize=12,
    titleFontWeight="bold",
    orient='right',
    fillColor="#ebf5ff",
    padding=10
).configure_axisX(
    labelFontSize=10,
    labelAngle=-45,
    grid=True,
    gridDash=[2,3],
    titleFontWeight="bold"
).configure_axisY(
    labelFontSize=11,
    grid=True,
    gridDash=[2,3],
    titleFontWeight="bold"
    )

config
Out[0]:
In [0]:
sc00_18_rate = sc00_18_wrkag_div

for i in range(8, len(sc00_18_rate.columns)):
    sc00_18_rate.iloc[:,i] = np.divide(sc00_18_rate.iloc[:,i], sc00_18_ov65_div.iloc[:, i])

div_dic = {0:"USA", 1:"New England", 2:"Mid-Atlantic", 3:"East North Central", 4:"West North Central", \
       5:"South Atlantic", 6:"East South Central", 7:"West South Central", 8:"Mountain", 9:"Pacific"}

sc00_18_rate2000 = sc00_18_rate.iloc[:, 8].reset_index()
sc00_18_rate2000["DIVISION"] = sc00_18_rate2000["DIVISION"].apply(lambda x: div_dic[x])
sc00_18_rate2000 = sc00_18_rate2000.rename(columns={"DIVISION":"Division", "POPEST2000_CIV": "per_wk"})
sc00_18_rate2000["Division"][2] = "Middle Atlantic"

sc00_18_rate2018 = sc00_18_rate.iloc[:,26].reset_index()
sc00_18_rate2018["DIVISION"] = sc00_18_rate2018["DIVISION"].apply(lambda x: div_dic[x])
sc00_18_rate2018 = sc00_18_rate2018.rename(columns={"DIVISION":"Division", "POPEST2018_CIV": "per_wk"})
sc00_18_rate2018["Division"][2] = "Middle Atlantic"

sc00_18_rate2000_div = sc00_18_rate2000[1:]
sc00_18_rate2018_div = sc00_18_rate2018[1:]

div = gpd.read_file("cb_2018_us_division_500k.shp")
col_use=["NAME","geometry"]
div = div[col_use]
div = div.rename(columns={"NAME":"Division"}) 

gdf1 = pd.merge(div, sc00_18_rate2000_div)

div["lon_lat"]= div.centroid
div2=div.drop(["geometry"], axis=1)
div2 = div2.rename(columns={"lon_lat":"geometry"})
div2["longitude"]= div2.geometry.x
div2["latitude"]= div2.geometry.y

div3 = pd.merge(div2, sc00_18_rate2000_div)
div3["per_wk"] = div3["per_wk"]
div3["longitude"] = div3["longitude"]

div4 = pd.merge(div2, sc00_18_rate2018_div)
div4["per_wk"] = div4["per_wk"]
div4["longitude"] = div4["longitude"]
In [0]:
choro_json = json.loads(gdf1.to_json())
choro_data = alt.Data(values=choro_json['features'])

base = alt.Chart(choro_data).mark_geoshape(
        fill="lightgrey",
        stroke='white',
        strokeWidth=1
    ).encode(
    ).project(
    type='albersUsa'
    ).properties(
        title={
      "text": "Lesser younger generation against elderly people", 
      "subtitle": ["The number of 65 over population per working-age person in 2000 and 2018"],
      "color": "blue",
      "subtitleColor": "black",
      "subtitleFontSize":18,
      "subtitleFontWeight":"bold"
    },width=800,
    height=550
    )

points1 = alt.Chart(div3).mark_circle(color="red", fillOpacity=0.8).encode(
    latitude="latitude:Q",
    longitude="longitude:Q",
    size=alt.Size("per_wk:Q",scale=alt.Scale(domain=[3.5, 6.1], range=[0, 3000]),
                  legend=alt.Legend(title="65over/working-age(2000)")
                  ),
    tooltip=["Division:N", "per_wk:Q"]
)

points2 = alt.Chart(div4).mark_circle(color="blue", fillOpacity=0.8).encode(
    latitude="latitude:Q",
    longitude="longitude:Q",
    size=alt.Size("per_wk:Q",scale=alt.Scale(domain=[3.5, 6.1], range=[0, 3000]),
                  legend=alt.Legend(title="65over/working-age(2018)")
),
    tooltip=["Division:N", "per_wk:Q"]
)

points = (points1 + points2).resolve_scale(size='independent')

config = (base + points)

config = config.configure_title(
    fontSize=24,
    fontWeight="bold", 
    anchor="middle"
).configure_legend(
    titleColor='black',
    titleFontSize=12,
    titleFontWeight="bold",
    symbolType="circle",
    orient='right',
    fillColor="#ebf5ff",
    padding=10
).configure_axisX(
    labelFontSize=10,
    labelAngle=-45
).configure_axisY(
    labelFontSize=10)

config
Out[0]:

1. Choropleth map for fertility rate per county(Revised)

Load data and preprocessing

In [0]:
df = pd.read_csv("fertility_data2.csv")

county = gpd.read_file("UScounties.shp")

county = county.rename(columns={"FIPS":"geoid"})
county["geoid"]= county["geoid"].astype(int)
county = county[["geoid", "geometry"]]

gdf_fert = county.merge(df, on="geoid")

choro_json = json.loads(gdf_fert.to_json())
choro_data = alt.Data(values=choro_json['features'])
In [0]:
df.iloc[1,:]["fert_13_17"]
Out[0]:
50.0
In [0]:
def evaluateForBivariate(row, x_bl, x_bh, y_bl, y_bh):
    colorMatrix = [["#fd8d3c", "#843c39", "#393b79"],
                   ["#f1e2cc", "#cccccc", "#4c78a8"],
                   ["#ffffcc", "#c6bdef", "#3182bd"],]
  
    xBoundaries = [x_bl, x_bh]
    xIdx = 0
    if row["total_pop"] < xBoundaries[0]:
        xIdx = 0
    elif row["total_pop"] < xBoundaries[1]:
        xIdx = 1
    else:
        xIdx = 2
  
    yBoundaries = [y_bl, y_bh]
    yIdx = 0
    if row["fert_13_17"] < yBoundaries[0]:
        yIdx = 2
    elif row["fert_13_17"] < yBoundaries[1]:
        yIdx = 1
    else:
        yIdx = 0
    return colorMatrix[yIdx][xIdx]; 
In [0]:
color = []

for i in range(0, len(df)):
    row = df.iloc[i,:]
    color.append(evaluateForBivariate(row, pop_33, pop_66, fert_33, fert_66))
  
df["color"] = np.array(color)

county = gpd.read_file("UScounties.shp")

county = county.rename(columns={"FIPS":"geoid"})
county["geoid"]= county["geoid"].astype(int)
county = county[["geoid", "geometry"]]

gdf_fert = county.merge(df, on="geoid")

choro_json = json.loads(gdf_fert.to_json())
choro_data = alt.Data(values=choro_json['features'])
In [0]:
gdf_fert
Out[0]:
geoid geometry Unnamed: 0 Unnamed: 0.1 county fert_13_17 fert_08_12 fert_05_09 total_pop med_income Unemployment_rate_2018 poverty_rate gini_idx pct_bl_hs pct_hsdip pct_col_assoc pct_bach_high state region crime_rate white_pct above_mean_wh black_pct above_mean_blk color
0 27077 POLYGON ((-95.34283 48.54668, -95.34105 48.715... 1333 1333 Lake of the Woods County, Minnesota 38.0 78.0 82.0 3841 46943 3.8 9.7 0.3502 10.0 41.6 31.3 17.0 Minnesota Midwest 0.003645 0.935173 1 0.000260 0 #ffffcc
1 53019 POLYGON ((-118.85163 47.94956, -118.84846 48.4... 2927 2927 Ferry County, Washington 78.0 26.0 26.0 7568 41081 11.7 17.3 0.4421 13.0 32.7 36.8 17.5 Washington West 0.006078 0.760967 0 0.000793 0 #fd8d3c
2 53065 POLYGON ((-117.43883 48.04412, -117.54219 48.0... 2950 2950 Stevens County, Washington 44.0 29.0 35.0 43858 47272 7.1 14.8 0.4248 10.6 33.4 37.3 18.6 Washington West 0.008619 0.889119 1 0.004583 0 #c6bdef
3 53047 POLYGON ((-118.97209 47.93915, -118.97406 47.9... 2941 2941 Okanogan County, Washington 53.0 71.0 57.0 41377 42598 6.4 20.4 0.4401 17.7 31.0 33.0 18.3 Washington West 0.017763 0.738043 0 0.004737 0 #cccccc
4 53051 POLYGON ((-117.43858 48.99992, -117.03205 48.9... 2943 2943 Pend Oreille County, Washington 100.0 51.0 60.0 13066 49184 7.2 16.3 0.4367 10.1 30.7 39.1 20.1 Washington West 0.017679 0.903184 1 0.002220 0 #fd8d3c
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
3098 51143 POLYGON ((-79.71720 36.54803, -79.64135 36.846... 2853 2853 Pittsylvania County, Virginia 43.0 54.0 51.0 61970 44356 3.6 12.6 0.4287 17.6 35.6 32.7 14.1 Virginia South 0.005277 0.749250 0 0.211602 1 #3182bd
3099 51590 POLYGON ((-79.51977 36.59777, -79.50138 36.621... 2887 2887 Danville city, Virginia 46.0 57.0 70.0 41911 34951 5.1 24.4 0.4771 19.5 28.4 33.3 18.9 Virginia South 0.025936 0.457207 0 0.491852 1 #c6bdef
3100 2261 MULTIPOLYGON (((-148.74604 60.84614, -148.4734... 83 83 Valdez-Cordova Census Area, Alaska 58.0 61.0 65.0 9439 86019 7.4 9.2 0.3946 4.1 28.0 35.2 32.7 Alaska West 0.009111 0.731857 0 0.001377 0 #f1e2cc
3101 2290 POLYGON ((-161.04770 62.20469, -160.99428 62.8... 85 85 Yukon-Koyukuk Census Area, Alaska 63.0 67.0 63.0 5453 37819 15.3 23.2 0.4684 15.3 41.7 30.6 12.4 Alaska West 0.010270 0.225747 0 0.000917 0 #fd8d3c
3102 8014 POLYGON ((-105.05201 39.99761, -104.99139 40.0... 241 241 Broomfield County, Colorado 44.0 52.0 64.0 64283 85639 2.9 4.6 0.4270 4.2 14.5 27.8 53.4 Colorado West 0.010283 0.861207 1 0.011232 0 #3182bd

3103 rows × 25 columns

In [0]:
pop_33 = np.percentile(df["total_pop"], 33.3)
pop_66 = np.percentile(df["total_pop"], 66.7)

fert_33 = np.percentile(df["fert_13_17"], 33.3)

fert_66 = np.percentile(df["fert_13_17"], 66.7)
In [0]:
color_range = ["#fd8d3c", "#843c39", "#393b79","#f1e2cc", "#cccccc", "#4c78a8", "#ffffcc", "#c6bdef", "#3182bd"]

color_domain = ["#fd8d3c", "#843c39", "#393b79","#f1e2cc", "#cccccc", "#4c78a8", "#ffffcc", "#c6bdef", "#3182bd"]

base = alt.Chart(choro_data).mark_geoshape(
        stroke='black',
        strokeWidth=1
    ).encode(
    ).project(
    type='albersUsa'
    ).properties(
        title={
      "text": "Fertility rate tends to be higher in rural areas",
      "subtitle": ["Fertility rate per 1000 women for each county"],
      "subtitleColor": "black",
      "subtitleFontSize":18,
      "subtitleFontWeight":"bold"
    },width=800,
    height=600
    )

    # Add Choropleth Layer
choro = alt.Chart(choro_data).mark_geoshape().encode(
        color = alt.Color("properties.color:N",
                          scale = alt.Scale(domain=color_domain, range=color_range),
                  title = "Fertility rate per 1000 women"),
         tooltip=['properties.county:O','properties.fert_13_17:Q']
    ).project(
    type='albersUsa'
    )

alt.renderers.enable(embed_options={'theme': 'quartz'})

config = base+choro
config = config.configure_title(
    color='blue',
    fontSize=24,
    fontWeight="bold", 
    anchor="middle"
).configure_legend(
    titleColor='black',
    titleFontSize=12,
    titleFontWeight="bold",
    orient='right',
    fillColor="#ebf5ff",
    padding=10
).configure_axisX(
    labelFontSize=11
).configure_axisY(
    labelFontSize=11)

config
Out[0]:

Above is the choropleth map which shows the fertility rate per 1000 women at county level. The thickness of color expresses how high the fertility rate for each county is. It is a little difficult to read clear pattern from the map, but we still can see that for most of the counties, fertility rate is around 50 per 1000 womens, and counties with relatively high level of fertility(roughly over 100) are seen more in central and western regions.

-Data source: American commmunity survey, Vegas-dataset for county data(Github)

-Encodings: Color and position(location)

In [0]:
df = pd.read_csv("fertility_data2.csv")
df.head()
Out[0]:
Unnamed: 0 Unnamed: 0.1 geoid county fert_13_17 fert_08_12 fert_05_09 total_pop med_income Unemployment_rate_2018 poverty_rate gini_idx pct_bl_hs pct_hsdip pct_col_assoc pct_bach_high state region crime_rate white_pct above_mean_wh black_pct above_mean_blk
0 0 0 1001 Autauga County, Alabama 71.0 41.0 61.0 55036 55317 3.6 13.4 0.4501 12.3 33.6 29.1 25.0 Alabama South 0.020041 0.766026 0 0.190966 1
1 1 1 1003 Baldwin County, Alabama 50.0 64.0 61.0 203360 52562 3.6 10.1 0.4618 9.8 27.8 31.7 30.7 Alabama South 0.015824 0.865268 1 0.095466 1
2 2 2 1005 Barbour County, Alabama 70.0 83.0 70.0 26201 33368 5.2 33.4 0.4622 26.9 35.5 25.5 12.0 Alabama South 0.007557 0.465631 0 0.480440 1
3 3 3 1007 Bibb County, Alabama 74.0 29.0 42.0 22580 43404 4.0 20.2 0.4518 17.9 43.9 25.0 13.2 Alabama South 0.011382 0.767183 0 0.220505 1
4 4 4 1009 Blount County, Alabama 58.0 67.0 60.0 57667 47412 3.5 12.8 0.4302 20.2 32.3 34.4 13.1 Alabama South 0.009052 0.954532 1 0.015867 0
In [0]:
base = alt.Chart(df).properties(
    width=700,
    height=300,
    title={
      "text": "Fertility rate tends to be higher in rural areas",
      "subtitle": ["Fertility rate per 1000 women for each county"],
      "subtitleColor": "black",
      "subtitleFontSize":18,
      "subtitleFontWeight":"bold"
      }
)

color_scale = alt.Scale(domain=['Midwest', 'Northeast', 'South','West', 'nan'],
                        range=['#98df8a', '#ff7f00','#ffed6f','#8624F5','#9d755d'])

hists = base.mark_bar(opacity=0.6, thickness=100).encode(
    x=alt.X('fert_13_17',
            bin=alt.Bin(maxbins=140), # step keeps bin size the same
            scale=alt.Scale(domain=[0,160]),
            axis=alt.Axis(title='fertility_rate')),
    y=alt.Y('count()',
            stack=None,
            scale=alt.Scale(domain=[0,100])),
    color=alt.Color('region:N',scale=color_scale)
).configure_title(
    fontSize=24,
    color="blue",
    fontWeight="bold",
    anchor="middle"
).configure_legend(
    titleColor='black',
    titleFontSize=12,
    titleFontWeight="bold",
    orient='right',
    fillColor="#ebf5ff",
    padding=10
).configure_axisX(
    labelFontSize=10,
    grid=False,
    gridDash=[2,3],
    titleFontWeight="bold"
).configure_axisY(
    labelFontSize=11,
    grid=True,
    gridDash=[2,3],
    titleFontWeight="bold"
)

hists
Out[0]:

2.Stacked area chart for population over age of 65

Load and preprocess data

In [0]:
aging = pd.read_csv("https://docs.google.com/spreadsheets/d/1ggeuLBBugr7yyctZ1qM_evCxKdBYqq_LaDRRRmklsCY/export?format=csv&gid=1073389986", encoding='latin-1')[1:]

use_cols = ["YEAR","HC01_EST_VC01","HC02_EST_VC01","HC02_EST_VC09","HC02_EST_VC10","HC02_EST_VC11",\
            "HC02_EST_VC12", "HC02_EST_VC13", "HC02_EST_VC14", "HC02_EST_VC15", "HC02_EST_VC16", "HC02_EST_VC18"]

aging = aging[use_cols]

aging.columns = ["Year", "total_pop", "total_pop_65", "total_pop65_onerace", "White", "Black",\
                 "Native","Asian","hawai","other", "two_or_more", "Hispanic or Latino"]

aging["Year"]= aging["Year"].astype("int")
aging["White"] = aging["White"].astype("float")
aging["Black"] = aging["Black"].astype("float")
aging["Native"] = aging["Native"].astype("float")
aging["Asian"] = aging["Asian"].astype("float")
aging["hawai"] = aging["hawai"].astype("float")
aging["other"] = aging["other"].astype("float")
aging["two_or_more"] = aging["two_or_more"].astype("float")
aging["Hispanic or Latino"] = aging["Hispanic or Latino"].astype("float")

aging["Others"] = aging["hawai"] + aging["other"] + aging["two_or_more"]

aging_nonhis = aging.drop(["hawai", "other", "two_or_more","Hispanic or Latino"], axis=1)

aging_his = aging[["Year", "Hispanic or Latino"]]
aging_his["Race"] = "Hispanic or Latino"

aging_his
Out[0]:
Year Hispanic or Latino Race
1 2017 8.2 Hispanic or Latino
2 2016 8.0 Hispanic or Latino
3 2015 7.9 Hispanic or Latino
4 2014 7.6 Hispanic or Latino
5 2013 7.4 Hispanic or Latino
6 2012 7.3 Hispanic or Latino
7 2011 7.1 Hispanic or Latino
8 2010 6.9 Hispanic or Latino
9 2009 6.9 Hispanic or Latino
10 2008 6.8 Hispanic or Latino
11 2007 6.6 Hispanic or Latino
12 2006 6.3 Hispanic or Latino
In [0]:
white = aging_nonhis[["Year","White"]]
white["Race"]="White"
white = white.rename(columns={"White":"pop65"})
white = white.sort_values(by=["Year"])

black = aging_nonhis[["Year","Black"]]
black["Race"]="Black"
black = black.rename(columns={"Black":"pop65"})
black = black.sort_values(by=["Year"])

native = aging_nonhis[["Year","Native"]]
native["Race"]="Native"
native = native.rename(columns={"Native":"pop65"})
native = native.sort_values(by=["Year"])

asian = aging_nonhis[["Year","Asian"]]
asian["Race"]="Asian"
asian = asian.rename(columns={"Asian":"pop65"})
asian = asian.sort_values(by=["Year"])

other = aging_nonhis[["Year","Others"]]
other["Race"]="Others"
other = other.rename(columns={"Others":"pop65"})
other = other.sort_values(by=["Year"])

aging_conc_nonhis = pd.concat([white, black, native, asian, other])

Below the stacked area chart shows how the racial structure of population over age of 65 changed by the year. We can see that through entire period the majority of population over 65 years old have been white, over 72 percent. However, the proportion of white slightly have decreased and the proportion of asian has been increased little by little.

In [0]:
col_domain = ["White", "Black", "Native", "Asian", "Others"]

col_range = ["yellow", "blue", "red", "green", "pink"]

chart1 = alt.Chart(aging_conc_nonhis).mark_area().encode(
    x="Year:N",
    y=alt.Y("pop65:Q",
    scale=alt.Scale(domain=[0,100]),
    axis=alt.Axis(title="Population of people over65(%)")
    ),
    color=alt.Color("Race:N",
                    scale= alt.Scale(domain=col_domain, range=col_range))
).properties(
    width=500,
    height=400,
    title={
      "text": "Race of elderly people is being gradually diverged",
      "subtitle": ["Racial structure for people over 65 years old in US(2006-2017)"],
      "subtitleColor": "black",
      "subtitleFontSize":18,
      "subtitleFontWeight":"bold"
      }
)

chart2 = alt.Chart(aging_his).mark_area().encode(
    x="Year:N",
    y="Hispanic or Latino:Q",
    color=alt.Color("Race:N",
                    scale = alt.Scale(range=["orange"]),
                    legend=alt.Legend(title=None)))


chart = (chart1 + chart2).resolve_scale(color='independent')

config = chart.configure_title(
    fontSize=24,
    color="blue",
    fontWeight="bold",
    anchor="middle"
).configure_legend(
    titleColor='black',
    titleFontSize=13,
    labelFontSize=11,
    orient='right',
    fillColor="#ebf5ff",
    padding=10
).configure_axisX(
    titleFontSize=13,
    titleFontWeight="bold",
    labelFontSize=11,
    labelAngle=-45
).configure_axisY(
    titleFontSize=13,
    titleFontWeight="bold",
    labelFontSize=11)

config
Out[0]:

Data source: American community survey

Encodings: Color, Area

3.Scatter plot for the relationship of income level and fertility(Revised)

Load data and preprocessing

Here the scatter plot below shows the relationship of median income and fertility rate, as well as the size of total population for each county. The plot seems basically scattered flat, but we might be able to see the slightly negative correlation between income and fertility rate.

In [0]:
chart = alt.Chart(gdf_fert, title="Relationship between fertility rate and median income"
).mark_circle(size=20).encode(
    x='med_income',
    y='fert_13_17',
    color="region",
    tooltip=['med_income', "gini_idx", 'fert_13_17']
)

alt.renderers.enable(embed_options={'theme': 'quartz'})

config = chart.configure_title(fontSize=16, anchor="middle"
).configure_legend(titleColor='black', titleFontSize=13
                   ).configure_axisX(labelFontSize=11
                   ).configure_axisY(labelFontSize=11)

config
Out[0]:

-Data source: American community survey

-Encodings: Position, size

4.Slope graph for fertility rate at division level

Load data and preprocessing

In [0]:
fert_vis_conc = pd.concat([fert_vis07, fert_vis17])

The slope graph below shows how fertility rate changed between 2007 and 2017 for division level. We can see that for all divisions, the fertility rate declined, and for some divisions such as East North Central and South atlantic, the fertility rate declined sharpely.

Data source: American community survey

Encodings: Color, position, angle

5.Ridgeline plot for fertility rate

Load data and preprocessing

In [0]:
urls = ["https://docs.google.com/spreadsheets/d/1sUA47Q8OrH18YI4q_DOP7OWIoHX2i8kH0fk5E4lFszE/export?format=csv&gid=1048618207",
        "https://docs.google.com/spreadsheets/d/18KT-PC9IXKqYL-wd5zHHzYV4K8eZ9mgUe98XaFANMQM/export?format=csv&gid=2072191966", 
        "https://docs.google.com/spreadsheets/d/1p_fq_sG3nYnHdOkLVglxXF2zmT1XrPK0c-538lcFIAg/export?format=csv&gid=1688370404",
        "https://docs.google.com/spreadsheets/d/1ymCdsBHgamJis3_2PK0IZS_VZvls5Umn-ng9o4S0Sro/export?format=csv&gid=119942101",
        "https://docs.google.com/spreadsheets/d/1AqUeUfzuAUG5pCLJGbrysCcTxICCrHsA8rRE1ffHZpU/export?format=csv&gid=1384942932",
        "https://docs.google.com/spreadsheets/d/1zvfu9Nc27aGo2ovAn2kqofv-iVXO61-n-dQ4DIrpbUs/export?format=csv&gid=2105631292",
        "https://docs.google.com/spreadsheets/d/1XAXIQeLrgxgEx6CCHxWOmfgmInRxwCxu-4vw1PR8dp8/export?format=csv&gid=1064535711",
        "https://docs.google.com/spreadsheets/d/1xad1XOm-J4T13rYFJ7BBeskaaCV88BAQZExqNX3aX0w/export?format=csv&gid=95139586",
        "https://docs.google.com/spreadsheets/d/1pBj5q5NkzpZUKeOqFZCGb0RdzV1mvS8rw2iXWcwYKbc/export?format=csv&gid=616415653",
        "https://docs.google.com/spreadsheets/d/1dZBbXEKSRqhR8Yy9_zFBAjzdw_Gyu3czYT0GNcGhrZI/export?format=csv&gid=687388009",
        "https://docs.google.com/spreadsheets/d/1pReietRze4QKpMOM6dbJeJ4bdekcGgrxcaC5k4Qv8C8/export?format=csv&gid=2108461693",
        "https://docs.google.com/spreadsheets/d/1KkqQ8U4vL5Tjy7Cq16FQD7xi3qnfPnyYiMYQuiJ6Hkc/export?format=csv&gid=957537605"]

dfs =[]

for url in urls:
  df = pd.read_csv(url, encoding='latin-1')[1:]
  dfs.append(df)

pop65_conc = pd.concat(dfs)

col_use = ["date", "HC01_EST_VC01", "HC02_EST_VC01"]

pop65_conc = pop65_conc[col_use]

pop65_conc["HC01_EST_VC01"]=pop65_conc["HC01_EST_VC01"].astype("int")
pop65_conc["HC02_EST_VC01"]=pop65_conc["HC02_EST_VC01"].astype("int")

pop65_conc["pop65_rate"] = pop65_conc["HC02_EST_VC01"]/pop65_conc["HC01_EST_VC01"]

pop65_conc["date"] =pd.to_datetime(pop65_conc["date"])
pop65_conc["year"] = pop65_conc["date"].dt.year
pop65_conc = pop65_conc.drop("date", axis=1)
pd.set_option('display.max_rows', 10000)
pop65_conc["year"] = pop65_conc["year"].astype("str") 

The ridgeline plot below shows how the distribution of population proportion over age 65 changed from 2006 to 2017 for county levels. We can see that, in earlier period, the proportion of people over age of 65 was more concentrated around the mean of about 15 % and there were very limited number of counties which recorded over 20 %. Then, in recent years, we can see that the tail becomes longer and the proportion is distributed toward higher rate, and there are some part of counties which record over 24 % of population aging.

6. Fertility rate for counties with different race structure(New!)

Process data

In [0]:
county_pts = county

county_pts["lon_lat"]= county_pts.centroid
county_pts = county_pts[["geoid", "lon_lat"]]
county_pts = county_pts.rename(columns={"lon_lat":"geometry"})

gdf_pts = county_pts.merge(df, on="geoid")

gdf_pts = gdf_pts[gdf_pts["state"] != "Alaska"]
gdf_pts = gdf_pts[gdf_pts["state"] != "Hawaii"]

gdf_ab_wh = gdf_pts[gdf_pts["above_mean_wh"]==1]
gdf_bl_wh = gdf_pts[gdf_pts["above_mean_wh"]==0]
gdf_ab_wh = gpd.GeoDataFrame(gdf_ab_wh)
gdf_bl_wh = gpd.GeoDataFrame(gdf_bl_wh)

gdf_ab_wh["region"] = gdf_ab_wh["region"].astype(str)
gdf_bl_wh["region"] = gdf_bl_wh["region"].astype(str)

Below we examine how the racial structure of each county affect the fertility rate. The above plot is for counties with higher percent of white prople than average of all counties, and the plot below is for counties with lower percent of white people. We can see that in mid-west region there are not many counties with lower percent of white people, but such counties with fewer white people seem to record relatively high fertility rate. Also, in south region, many of the counties have lower percent of white people, and most of them record average or higher level of fertility rate. On the other hand, in west region and noth-east region, some of the counties with high percent of white people show very low fertility rate.

In [0]:
import geoplot.crs as gcrs

alt.renderers.enable(embed_options={'theme': 'quartz'})

contiguous_usa = gpd.read_file(gplt.datasets.get_path('contiguous_usa'))
fig = plt.figure(figsize=(20,15))
proj = projection=gcrs.AlbersEqualArea(central_latitude=45.7128, central_longitude=-100.0059)
ax1 = plt.subplot(211, projection=proj)
ax2 = plt.subplot(212, projection=proj)

ax1 = gplt.pointplot(gdf_ab_wh, projection=proj, scale="fert_13_17", hue='region',
                     cmap='Set1',limits=(1, 20), legend=True, legend_var='scale',
                     edgecolor='white', linewidth=0.5, ax=ax1)

ax2 = gplt.pointplot(gdf_bl_wh, projection=proj, scale="fert_13_17", hue='region',
                     cmap='Set1',limits=(1, 20),legend=True, legend_var='scale',
                     edgecolor='white', linewidth=0.5, ax=ax2)

gplt.polyplot(contiguous_usa,edgecolor='white', facecolor='lightgray',
     ax=ax1)
ax1.set_title("Fertility rate for county with high pct of white, 2013-2017",
              fontdict={'fontsize': 16})

gplt.polyplot(contiguous_usa,edgecolor='white', facecolor='lightgray',
     ax=ax2)
ax2.set_title("Fertility rate for county with low pct of white, 2013-2017",
              fontdict={'fontsize': 16})
Out[0]:
Text(0.5, 1.0, 'Fertility rate for county with low pct of white, 2013-2017')

7. Histogram for fertility rate with different regions(New!)

The histogram below shows the distribution of fertility rate for five different regions. Although absolute number of records are different, there are not big difference between each region in terms of distribution of fertility rate. For most of regions fertility rate is normally distributed around the peak of about 50 per 1000 women.

In [0]:
base = alt.Chart(df).properties(
    width=700,
    height=300,
    title={
      "text": ["Fertility rate distribution by region"], 
      "color": "black",
    }
)

color_scale = alt.Scale(domain=['Midwest', 'Northeast', 'South','West', 'nan'],
                        range=['#98df8a', '#ff7f00','#ffed6f','#8624F5','#9d755d'])

hists = base.mark_bar(opacity=0.6, thickness=100).encode(
    x=alt.X('fert_13_17',
            bin=alt.Bin(maxbins=140), # step keeps bin size the same
            scale=alt.Scale(domain=[0,250]),
            axis=alt.Axis(title='fertility_rate', grid=False)),
    y=alt.Y('count()',
            stack=None,
            scale=alt.Scale(domain=[0,100])),
    color=alt.Color('region:N',scale=color_scale)
).configure_title(fontSize=16, anchor="middle"
).configure_legend(titleColor='black', titleFontSize=13
                   ).configure_axisX(labelFontSize=11
                   ).configure_axisY(labelFontSize=11)
hists
Out[0]:

8. Heatmap for fertility and and some variables(New!)

The heat map below shows how fertility rate is correlated with some demographic indicators for those areas.Here we examine the relationship with poverty rate and the percent of people who do not graduate from high school. From the map we cannot see clear correlation between fertility rate and these variables, though non-high school graduate rate might be negatively correlated with fertility rate.

In [0]:
import altair as alt

a = alt.Chart(df).properties(
    width=300,
    height=200,
    title={
      "text": ["Fertility rate vs poverty rate"], 
      "color": "black",
    }
).mark_rect().encode(
    alt.X('poverty_rate:Q', bin=alt.Bin(maxbins=60),
          axis=alt.Axis(title='poverty_rate', grid=True)),
    alt.Y('fert_13_17:Q', bin=alt.Bin(maxbins=40),
          axis=alt.Axis(title='fertility_rate', grid=True)),
    alt.Color("count()", scale=alt.Scale(scheme='inferno'))
)

b = alt.Chart(df).properties(
    width=300,
    height=200,
    title={
      "text": ["Fertility rate vs non-high school degree rate"], 
      "color": "black",
    }
).mark_rect().encode(
    alt.X('pct_bl_hs:Q', bin=alt.Bin(maxbins=60),
          axis=alt.Axis(title='non-high schoolgrad-rate', grid=True)),
    alt.Y('fert_13_17:Q', bin=alt.Bin(maxbins=40),
          axis=alt.Axis(title='fertility_rate', grid=True)),
    alt.Color("count()", scale=alt.Scale(scheme='inferno'),
              legend=alt.Legend(title='Total Records'))
)

c = a|b

c.configure_title(fontSize=16, anchor="middle"
).configure_legend(titleColor='black', titleFontSize=13
                   ).configure_axisX(labelFontSize=11
                   ).configure_axisY(labelFontSize=11)
Out[0]:
In [0]:
source
Out[0]:
'https://vega.github.io/vega-datasets/data/seattle-weather.csv'
In [0]:
 
/content/drive/My Drive/Datavisualization
In [0]:
 
Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive